WildR Brain Tissue Spatial Transcriptomics Analysis

Author

Joe Boktor

Published

April 13, 2024

Background

Here we analyze

Analysis Setup

Environment setup.

library(tidyverse)
library(magrittr)
library(glue)
library(Seurat)
library(grid)
library(biomaRt)
library(SingleR)
library(batchtools)
# stats 
library(lme4)
library(broom)
# parallelization
library(BiocParallel)
library(future)
library(furrr)
# plotting
library(RColorBrewer)
library(ggsci)
library(Matrix)
library(plotly)
library(DT)
library(gtsummary)
library(aplot)
library(patchwork)
library(viridis)
library(vegan)
library(ggpubr)
library(ggside)
library(ggsci)

# setting paths
homedir <- "/central/groups/mthomson/jboktor"
wkdir <- glue("{homedir}/spatial_genomics/jess_2024-01-23")
source(glue("{wkdir}/notebooks/R_scripts/_misc_functions.R"))

# 12gb  limit (1500*1024^2 = 1572864000) * 8
options(future.globals.maxSize= 12582912000)
future::plan("multisession", workers = 32)

Summary stats of data

abca_color_pal <- readRDS(glue("{wkdir}/data/interim/abca_color_pal.rds"))
merged_rois <- readRDS(
  glue("{wkdir}/data/interim/merged_roi_seurat_2024-07-15.rds")
)

# Adding cell annots to object ----
preds_combined <- readRDS(
  glue(
    "{wkdir}/data/interim/alignments/",
    "2024-07-15_ABCA-1&2_SingleR_predictions_combined.rds"
  )
)
# adding singleR labels into seurat object
merged_rois$singleR_labels <-
  preds_combined$labels[
    match(rownames(merged_rois@meta.data), rownames(preds_combined))
  ]
merged_rois$singleR_labels_refined <-
  preds_combined$pruned.labels[
    match(rownames(merged_rois@meta.data), rownames(preds_combined))
  ]

# Adding staNMF data to object ---
PPs_r <- readRDS(
  glue("{wkdir}/data/interim/osNMF/osNMF_PPs_K11_2024-07-15.rds")
)
merged_rois <- Seurat::AddMetaData(
  object = merged_rois,
  metadata = PPs_r
)

merged_rois$slice_id <- 
  glue("{toupper(merged_rois$group)} {merged_rois$run} {merged_rois$roi}")

meta_df <- merged_rois@meta.data %>% glimpse()

Goal is to remove problematic cells characterized by 1. Annotation with a cell type not likely to exist in this bregma region 2. High distortion signal from serial genes (high osNMF 1 signal)

Key questions; 1. Are the cells that are highly represented in osNMF1 only influenced by issues with serial genes, or are there more general issues - caused by image focus. - Are high osNMF1 cells enriched for cell counts or altered in alpha diversity metrics compared to other cells of the same class that are low in osNMF 1

Visualizing cells in osNMF1 at various thresholds

threshold_list <- seq(0, 0.2, by = 0.025) 

for (thesh in threshold_list){
  message(glue("Plotting: {thesh}"))
  p <- merged_rois@meta.data %>%
    rownames_to_column(var = "cell_id") %>%
    filter(osNMF_1 > thesh) %>%
    arrange(osNMF_1) %>%
    mutate(cell_id, factor(cell_id)) %>%
    arrange(group) %>%
    mutate(slice_id, factor(slice_id)) %>%
    ggplot(aes(center_x, center_y, color = osNMF_1)) +
    geom_point(size = 0.15) +
    scale_color_viridis() +
    coord_fixed() +
    scale_y_reverse() +
    facet_wrap(~slice_id, ncol = 4) +
    theme_bw()

  ggsave(
    glue("{wkdir}/figures/osNMF_K11/osNMF1_thresholds/",
    "K11_osNMF1_{thesh}_{Sys.Date()}.png"),
    p,
    width = 16, height = 16
  )
}

Figure 1: Cells selected through a threshold of osNMF_1 > 0.125

Remove cells that are are below x total counts in the reference set

For each cell, the Simpson’s Evenness index is calculated as follows:

  1. Calculate the Simpson’s diversity index \(D\):

\(D = \sum_{i=1}^{S} p_i^2\)

where \(p_i\) is the proportion of the total count for genes \(i\) and \(S\) is the total number of genes.

  1. Calculate the number of genes \(S\):

\(S = \text{specnumber}\)

  1. Calculate the Simpson’s Evenness index $ E $:

\(E = \frac{D}{\log(S)}\)

where \(\log\) is the natural logarithm.

# Define a function to calculate Simpson's Evenness for a single cell
calculate_simpsons_evenness_per_cell <- function(cell_counts) {
  simpson_index <- vegan::diversity(cell_counts, index = "simpson")
  species_count <- vegan::specnumber(cell_counts)
  evenness <- simpson_index / log(species_count)
  return(evenness)
}

counts <- as.matrix(merged_rois@assays$SCT@counts)
counts %>% dim

# Apply the function to each cell
simpsons_evenness <- t(counts) %>%
  calculate_simpsons_evenness_per_cell()

merged_rois <- AddMetaData(merged_rois, 
  metadata = simpsons_evenness, 
  col.name = "simpsons_evenness"
  )

meta_df <- merged_rois@meta.data %>% glimpse()

quantiles <- quantile(meta_df$simpsons_evenness, 
  probs = seq(0, 1, by = 0.01)) %>%
  unique()

meta_df %<>%
  mutate(simpsons_evenness_quantiles = cut(
    simpsons_evenness,
    breaks = quantiles,
    labels = rev(paste0("Q", seq(1, length(quantiles) - 1))),
    include.lowest = TRUE,
    right = FALSE
  )) %>%
  mutate(high_evenness = 
  if_else(simpsons_evenness_quantiles == "Q1", TRUE, FALSE)) %>%
  glimpse()

# save 
meta_df %>%
  select(simpsons_evenness, simpsons_evenness_quantiles, high_evenness) %>%
  saveRDS(
    glue("{wkdir}/data/interim/simpsons_evenness_{Sys.Date()}.rds")
  )
meta_df$high_evenness %>% table

p_nmf1_v_evenness <- meta_df %>%
  ggplot(aes(simpsons_evenness, osNMF_1)) +
  geom_point(size = 0.2, alpha = 0.3, aes(color = high_evenness)) +
  stat_cor(label.x = 0.14, label.y = 0.21) +
  stat_regline_equation(label.x = 0.14, label.y = 0.2) +
  ggside::geom_xsidedensity(aes(y = after_stat(density))) +
  geom_ysidedensity(aes(x = after_stat(density))) +
  labs(x = "Simpson's Evenness", y = "osNMF_1") +
  scale_color_d3() +
  theme_bw() +
  theme(ggside.panel.scale.x = .2, ggside.panel.scale.y = .2) +
  scale_ysidex_continuous(guide = guide_axis(angle = 90), minor_breaks = NULL) 

ggsave(
  glue("{wkdir}/figures/osNMF_K11/osNMF1_vs_evenness_{Sys.Date()}.png"),
  p_nmf1_v_evenness,
  width = 7, height = 7
)


p_nmf1_v_evenness_immune <- meta_df %>%
  filter(singleR_labels == "34 Immune") %>%
  ggplot(aes(simpsons_evenness, osNMF_1)) +
  geom_point(size = 0.2, alpha = 0.3, aes(color = high_evenness)) +
  stat_cor(label.x = 0.14, label.y = 0.21) +
  stat_regline_equation(label.x = 0.14, label.y = 0.2) +
  ggside::geom_xsidedensity(aes(y = after_stat(density))) +
  geom_ysidedensity(aes(x = after_stat(density))) +
  labs(x = "Simpson's Evenness", y = "osNMF_1") +
  theme_bw() +
  scale_color_d3() +
  theme(ggside.panel.scale.x = .2, ggside.panel.scale.y = .2) +
  scale_ysidex_continuous(guide = guide_axis(angle = 90), minor_breaks = NULL) 

ggsave(
  glue("{wkdir}/figures/osNMF_K11/osNMF1_vs_evenness_34-Immune_{Sys.Date()}.png"),
  p_nmf1_v_evenness_immune,
  width = 7, height = 7
)

Figure 2: Simpson’s Evenness by osNMF1 weights for entire dataset

Figure 3: Simpson’s Evenness by osNMF1 weights for Immune cells across all slices

Cells in our osNMF1 dimension show a positive correlation with simpsons evenness, suggesting that these cells are not defined by high expression of a single gene but rather multiple genes. Previous analysis suggests this NMF dimension capture serial gene expression by looking at the weights of genes.

Filtering unlikely cell-types

ref_meta %>% glimpse

ref_meta %>%
  group_by(class) %>%
  summarize(n = n()) %>%
  arrange(n) %>%
  View()

ref_meta %>%
  group_by(class) %>%
  summarize(n = n()) %>%
  filter(n < 600) %>%
  pull(class)

joined_relab_stats %>%
  filter(n_mean_ref < 100) %>% View
  pull(singleR_labels) %>%
  unique()

#' Black list was determined by manually inspecting the data and identifying
#' low abundance cell types that are disporportionately abundant in our seqFISH dataset

black_list <- c(
  "10 LSX GABA",
  "15 HY Gnrh1 Glut",
  "22 MB-HB Sero",
  "23 P Glut",
  "24 MY Glut",
  "25 Pineal Glut",
  "26 P GABA",
  "27 MY GABA", 
  "28 CB GABA",
  "29 CB Glut",
  "32 OEC"
  )

Filtering Data

Criteria for filtering:

  1. Cells with osNMF 1 > 0.125
  2. Cells with simpsons evenness score in the top 1% of the distribution
  3. Annotated as unlikely cell types (blacklist)
# add evenness score to metadata
evenness_meta <- readRDS(
    glue("{wkdir}/data/interim/simpsons_evenness_2024-07-22.rds")
  ) %>% glimpse

merged_rois$high_evenness <- evenness_meta$high_evenness

lowq_cells <- merged_rois %>%
  subset(
    singleR_labels %in% black_list |
    osNMF_1 >= 0.125 |
    high_evenness
  )

# printing a plot of low quality cells
p_lowq <- lowq_cells@meta.data %>%
    ggplot(aes(center_x, center_y, color = singleR_labels)) +
    geom_point(size = 0.1) +
    scale_color_manual(values = abca_color_pal[["class_colors"]]) +
    coord_fixed() +
    scale_y_reverse() +
    facet_wrap(~slice_id, ncol = 4) +
    guides(colour = guide_legend(override.aes = list(size=3))) +
    theme_bw()

ggsave(
  glue("{wkdir}/figures/low-quality-cells_{Sys.Date()}.png"),
  p_lowq,
  width = 16, height = 16
  )

Figure 4: Cells removed from dataset

From 1,076,046 cells to 977,050